import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
import os
%matplotlib inline
spotify_data = pd.read_csv('C:\\Users\\RS\\data.csv')
genre_data = pd.read_csv('C:\\Users\\RS\\data_by_genres.csv')
data_by_year = pd.read_csv('C:\\Users\\RS\\data_by_year.csv')
spotify_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 170653 entries, 0 to 170652 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 valence 170653 non-null float64 1 year 170653 non-null int64 2 acousticness 170653 non-null float64 3 artists 170653 non-null object 4 danceability 170653 non-null float64 5 duration_ms 170653 non-null int64 6 energy 170653 non-null float64 7 explicit 170653 non-null int64 8 id 170653 non-null object 9 instrumentalness 170653 non-null float64 10 key 170653 non-null int64 11 liveness 170653 non-null float64 12 loudness 170653 non-null float64 13 mode 170653 non-null int64 14 name 170653 non-null object 15 popularity 170653 non-null int64 16 release_date 170653 non-null object 17 speechiness 170653 non-null float64 18 tempo 170653 non-null float64 dtypes: float64(9), int64(6), object(4) memory usage: 24.7+ MB
genre_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2973 entries, 0 to 2972 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 mode 2973 non-null int64 1 genres 2973 non-null object 2 acousticness 2973 non-null float64 3 danceability 2973 non-null float64 4 duration_ms 2973 non-null float64 5 energy 2973 non-null float64 6 instrumentalness 2973 non-null float64 7 liveness 2973 non-null float64 8 loudness 2973 non-null float64 9 speechiness 2973 non-null float64 10 tempo 2973 non-null float64 11 valence 2973 non-null float64 12 popularity 2973 non-null float64 13 key 2973 non-null int64 dtypes: float64(11), int64(2), object(1) memory usage: 325.3+ KB
data_by_year.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 100 entries, 0 to 99 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 mode 100 non-null int64 1 year 100 non-null int64 2 acousticness 100 non-null float64 3 danceability 100 non-null float64 4 duration_ms 100 non-null float64 5 energy 100 non-null float64 6 instrumentalness 100 non-null float64 7 liveness 100 non-null float64 8 loudness 100 non-null float64 9 speechiness 100 non-null float64 10 tempo 100 non-null float64 11 valence 100 non-null float64 12 popularity 100 non-null float64 13 key 100 non-null int64 dtypes: float64(11), int64(3) memory usage: 11.1 KB
import plotly.express as px
sound_features = ['acousticness', 'danceability', 'energy',
'instrumentalness', 'liveness', 'valence']
fig = px.line(data_by_year, x='year', y=sound_features)
fig.show()
fig = px.line(data_by_year, x='year', y='tempo')
fig.show()
# Boxplot of danceability by genre
sns.boxplot(x='danceability', y='genres', data=genre_data)
plt.title("Danceability by Genre")
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# assume genre_data is a pandas DataFrame containing the relevant data
# calculate mean danceability for each genre
genre_danceability = genre_data.groupby('genres')['danceability'].mean()
# sort genres by danceability and select top 10
top_genres = genre_danceability.sort_values(ascending=False).head(10).index.tolist()
# filter the data for only the top 10 genres
genre_data_top10 = genre_data[genre_data['genres'].isin(top_genres)]
# create the histogram
sns.histplot(x='danceability', hue='genres', data=genre_data_top10, multiple='stack')
plt.title("Danceability by Top 10 Genres")
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# assume genre_data is a pandas DataFrame containing the relevant data
# calculate mean danceability for each genre
genre_danceability = genre_data.groupby('genres')['danceability'].mean()
# sort genres by danceability and select top 10
top_genres = genre_danceability.sort_values(ascending=False).head(10)
# create a color palette
palette = sns.color_palette("hls", 10)
# create the bar plot with a different color for each bar
sns.barplot(x=top_genres.values, y=top_genres.index, palette=palette)
plt.title("Top 10 Genres by Mean Danceability")
plt.xlabel("Mean Danceability")
plt.ylabel("Genre")
plt.show()
top_10_genres = genre_data.nlargest(10, 'popularity')
fig = px.bar(top_10_genres, x='genres', y=['valence', 'energy',
'danceability', 'acousticness'],
barmode='group')
fig.show()
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
cluster_pipeline = Pipeline([('scaler', StandardScaler()),
('kmeans', KMeans(n_clusters=10))])
X = genre_data.select_dtypes(np.number)
cluster_pipeline.fit(X)
genre_data['cluster'] = cluster_pipeline.predict(X)
from sklearn.manifold import TSNE
tsne_pipeline = Pipeline([('scaler', StandardScaler()),
('tsne', TSNE(n_components=2, verbose=2))])
genre_embedding = tsne_pipeline.fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=genre_embedding)
projection['genres'] = genre_data['genres']
projection['cluster'] = genre_data['cluster']
D:\G\ss\lib\site-packages\sklearn\manifold\_t_sne.py:780: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2. D:\G\ss\lib\site-packages\sklearn\manifold\_t_sne.py:790: FutureWarning: The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.
[t-SNE] Computing 91 nearest neighbors... [t-SNE] Indexed 2973 samples in 0.148s... [t-SNE] Computed neighbors for 2973 samples in 0.373s... [t-SNE] Computed conditional probabilities for sample 1000 / 2973 [t-SNE] Computed conditional probabilities for sample 2000 / 2973 [t-SNE] Computed conditional probabilities for sample 2973 / 2973 [t-SNE] Mean sigma: 0.777516 [t-SNE] Computed conditional probabilities in 0.066s [t-SNE] Iteration 50: error = 81.9775620, gradient norm = 0.0043106 (50 iterations in 0.903s) [t-SNE] Iteration 100: error = 76.7715149, gradient norm = 0.0183697 (50 iterations in 0.759s) [t-SNE] Iteration 150: error = 76.2398071, gradient norm = 0.0047412 (50 iterations in 0.603s) [t-SNE] Iteration 200: error = 76.1542206, gradient norm = 0.0007833 (50 iterations in 0.941s) [t-SNE] Iteration 250: error = 76.1253738, gradient norm = 0.0004384 (50 iterations in 0.986s) [t-SNE] KL divergence after 250 iterations with early exaggeration: 76.125374 [t-SNE] Iteration 300: error = 1.8275498, gradient norm = 0.0011280 (50 iterations in 0.976s) [t-SNE] Iteration 350: error = 1.5905074, gradient norm = 0.0003895 (50 iterations in 0.867s) [t-SNE] Iteration 400: error = 1.5003856, gradient norm = 0.0002519 (50 iterations in 0.975s) [t-SNE] Iteration 450: error = 1.4568652, gradient norm = 0.0001596 (50 iterations in 0.893s) [t-SNE] Iteration 500: error = 1.4333408, gradient norm = 0.0001210 (50 iterations in 0.905s) [t-SNE] Iteration 550: error = 1.4192455, gradient norm = 0.0000979 (50 iterations in 0.907s) [t-SNE] Iteration 600: error = 1.4100113, gradient norm = 0.0001056 (50 iterations in 0.926s) [t-SNE] Iteration 650: error = 1.4039299, gradient norm = 0.0000735 (50 iterations in 0.884s) [t-SNE] Iteration 700: error = 1.3995304, gradient norm = 0.0000698 (50 iterations in 0.901s) [t-SNE] Iteration 750: error = 1.3961930, gradient norm = 0.0000670 (50 iterations in 0.932s) [t-SNE] Iteration 800: error = 1.3934026, gradient norm = 0.0000620 (50 iterations in 0.933s) [t-SNE] Iteration 850: error = 1.3909042, gradient norm = 0.0000536 (50 iterations in 0.960s) [t-SNE] Iteration 900: error = 1.3887153, gradient norm = 0.0000481 (50 iterations in 0.864s) [t-SNE] Iteration 950: error = 1.3869228, gradient norm = 0.0000530 (50 iterations in 0.987s) [t-SNE] Iteration 1000: error = 1.3850130, gradient norm = 0.0000471 (50 iterations in 0.870s) [t-SNE] KL divergence after 1000 iterations: 1.385013
import plotly.express as px
fig = px.scatter(
projection, x='x', y='y', color='cluster', hover_data=['x', 'y',
'genres']
)
fig.show()
genre_data
| mode | genres | acousticness | danceability | duration_ms | energy | instrumentalness | liveness | loudness | speechiness | tempo | valence | popularity | key | cluster | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 21st century classical | 0.979333 | 0.162883 | 1.602977e+05 | 0.071317 | 0.606834 | 0.361600 | -31.514333 | 0.040567 | 75.336500 | 0.103783 | 27.833333 | 6 | 2 |
| 1 | 1 | 432hz | 0.494780 | 0.299333 | 1.048887e+06 | 0.450678 | 0.477762 | 0.131000 | -16.854000 | 0.076817 | 120.285667 | 0.221750 | 52.500000 | 5 | 1 |
| 2 | 1 | 8-bit | 0.762000 | 0.712000 | 1.151770e+05 | 0.818000 | 0.876000 | 0.126000 | -9.180000 | 0.047000 | 133.444000 | 0.975000 | 48.000000 | 7 | 8 |
| 3 | 1 | [] | 0.651417 | 0.529093 | 2.328809e+05 | 0.419146 | 0.205309 | 0.218696 | -12.288965 | 0.107872 | 112.857352 | 0.513604 | 20.859882 | 7 | 4 |
| 4 | 1 | a cappella | 0.676557 | 0.538961 | 1.906285e+05 | 0.316434 | 0.003003 | 0.172254 | -12.479387 | 0.082851 | 112.110362 | 0.448249 | 45.820071 | 7 | 6 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2968 | 1 | zolo | 0.222625 | 0.547082 | 2.580991e+05 | 0.610240 | 0.143872 | 0.204206 | -11.295878 | 0.061088 | 125.494919 | 0.596155 | 33.778943 | 9 | 6 |
| 2969 | 0 | zouglou | 0.161000 | 0.863000 | 2.063200e+05 | 0.909000 | 0.000000 | 0.108000 | -5.985000 | 0.081300 | 119.038000 | 0.845000 | 58.000000 | 7 | 7 |
| 2970 | 1 | zouk | 0.263261 | 0.748889 | 3.060728e+05 | 0.622444 | 0.257227 | 0.089678 | -10.289222 | 0.038778 | 101.965222 | 0.824111 | 46.666667 | 5 | 8 |
| 2971 | 0 | zurich indie | 0.993000 | 0.705667 | 1.984173e+05 | 0.172667 | 0.468633 | 0.179667 | -11.453333 | 0.348667 | 91.278000 | 0.739000 | 0.000000 | 7 | 4 |
| 2972 | 1 | zydeco | 0.421038 | 0.629409 | 1.716717e+05 | 0.609369 | 0.019248 | 0.255877 | -9.854825 | 0.050491 | 126.366087 | 0.808544 | 30.261905 | 7 | 8 |
2973 rows × 15 columns
song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
('kmeans', KMeans(n_clusters=20,
verbose=2))],
verbose=True)
X = spotify_data.select_dtypes(np.number)
number_cols = list(X.columns)
number_cols
song_cluster_pipeline.fit(X)
song_cluster_labels = song_cluster_pipeline.predict(X)
spotify_data['cluster_label'] = song_cluster_labels
[Pipeline] ............ (step 1 of 2) Processing scaler, total= 0.1s Initialization complete Iteration 0, inertia 1518431.2107395953 Iteration 1, inertia 1176719.0925323036 Iteration 2, inertia 1121839.8004857716 Iteration 3, inertia 1102035.0666062639 Iteration 4, inertia 1089771.9407152093 Iteration 5, inertia 1080722.2994542557 Iteration 6, inertia 1073522.1132717908 Iteration 7, inertia 1068280.7770367519 Iteration 8, inertia 1064824.5245022902 Iteration 9, inertia 1062666.099524576 Iteration 10, inertia 1061272.7287166417 Iteration 11, inertia 1060331.2738080064 Iteration 12, inertia 1059604.737659868 Iteration 13, inertia 1059159.2481732247 Iteration 14, inertia 1058852.1393562001 Iteration 15, inertia 1058630.6139641325 Iteration 16, inertia 1058497.489310806 Iteration 17, inertia 1058411.9682589525 Iteration 18, inertia 1058333.4947391595 Iteration 19, inertia 1058276.7663930529 Iteration 20, inertia 1058208.5289122937 Iteration 21, inertia 1058113.8050155211 Iteration 22, inertia 1058080.1120315497 Iteration 23, inertia 1058061.295989202 Iteration 24, inertia 1058054.4060224914 Iteration 25, inertia 1058049.6635927763 Iteration 26, inertia 1058046.3037710977 Iteration 27, inertia 1058044.0468948237 Converged at iteration 27: center shift 9.90817964533811e-05 within tolerance 0.00010000000000000789. Initialization complete Iteration 0, inertia 1559384.1776030476 Iteration 1, inertia 1195634.6516553042 Iteration 2, inertia 1158825.8920438814 Iteration 3, inertia 1138544.2063523107 Iteration 4, inertia 1121156.4330490923 Iteration 5, inertia 1106988.7620414924 Iteration 6, inertia 1095832.4532479884 Iteration 7, inertia 1088902.9679148134 Iteration 8, inertia 1084667.0004423996 Iteration 9, inertia 1081632.420884549 Iteration 10, inertia 1079509.896174779 Iteration 11, inertia 1077785.2641181888 Iteration 12, inertia 1076335.5046003303 Iteration 13, inertia 1074987.4209390075 Iteration 14, inertia 1073576.025524011 Iteration 15, inertia 1072047.5792999435 Iteration 16, inertia 1070304.8317476106 Iteration 17, inertia 1068423.0928194448 Iteration 18, inertia 1066725.9993725605 Iteration 19, inertia 1065539.5708809209 Iteration 20, inertia 1064767.6904959816 Iteration 21, inertia 1064277.904192029 Iteration 22, inertia 1063987.3453930086 Iteration 23, inertia 1063802.4312042985 Iteration 24, inertia 1063669.3306529338 Iteration 25, inertia 1063571.6131669658 Iteration 26, inertia 1063489.2908760826 Iteration 27, inertia 1063420.3420008158 Iteration 28, inertia 1063355.9751168643 Iteration 29, inertia 1063287.4510769665 Iteration 30, inertia 1063224.5580447044 Iteration 31, inertia 1063155.9786223928 Iteration 32, inertia 1063086.4996239473 Iteration 33, inertia 1063007.8302863322 Iteration 34, inertia 1062909.2626284184 Iteration 35, inertia 1062776.4580679566 Iteration 36, inertia 1062586.3701343553 Iteration 37, inertia 1062342.819166396 Iteration 38, inertia 1061965.2691590665 Iteration 39, inertia 1061374.971711272 Iteration 40, inertia 1060630.665688638 Iteration 41, inertia 1059912.0603499413 Iteration 42, inertia 1059377.2810996866 Iteration 43, inertia 1059071.3595787743 Iteration 44, inertia 1058885.8259530005 Iteration 45, inertia 1058770.2782726677 Iteration 46, inertia 1058685.9776619256 Iteration 47, inertia 1058626.6251569279 Iteration 48, inertia 1058573.0244780292 Iteration 49, inertia 1058516.8640162086 Iteration 50, inertia 1058455.5532407053 Iteration 51, inertia 1058385.8033518947 Iteration 52, inertia 1058305.6211516273 Iteration 53, inertia 1058195.5760436459 Iteration 54, inertia 1058034.0229115132 Iteration 55, inertia 1057821.3428421803 Iteration 56, inertia 1057561.734543161 Iteration 57, inertia 1057294.2388344912 Iteration 58, inertia 1057029.3986666582 Iteration 59, inertia 1056817.0081928251 Iteration 60, inertia 1056673.2028299319 Iteration 61, inertia 1056584.4836397655 Iteration 62, inertia 1056532.1476413126 Iteration 63, inertia 1056501.493979799 Iteration 64, inertia 1056484.615441137 Iteration 65, inertia 1056472.2155652544 Iteration 66, inertia 1056463.866086048 Iteration 67, inertia 1056458.4806436726 Iteration 68, inertia 1056454.8399684452 Iteration 69, inertia 1056451.6201729078 Iteration 70, inertia 1056448.7325498476 Iteration 71, inertia 1056445.8228834884 Iteration 72, inertia 1056442.7657642637 Iteration 73, inertia 1056439.2565826396 Iteration 74, inertia 1056434.9727333977 Iteration 75, inertia 1056431.9070099094 Iteration 76, inertia 1056428.8563668646 Iteration 77, inertia 1056426.0553972123 Iteration 78, inertia 1056424.040204331 Converged at iteration 78: center shift 8.294355896458048e-05 within tolerance 0.00010000000000000789. Initialization complete Iteration 0, inertia 1481004.6992672693 Iteration 1, inertia 1151723.5111618347 Iteration 2, inertia 1118025.4533933988 Iteration 3, inertia 1106926.274026199 Iteration 4, inertia 1101438.367066885 Iteration 5, inertia 1098170.0501537628 Iteration 6, inertia 1096066.9778469112 Iteration 7, inertia 1093959.3575540395 Iteration 8, inertia 1091261.8972434495 Iteration 9, inertia 1087336.521086591 Iteration 10, inertia 1082357.7996503597 Iteration 11, inertia 1079089.9355448065 Iteration 12, inertia 1077805.950502848 Iteration 13, inertia 1077052.167387624 Iteration 14, inertia 1076374.3672525089 Iteration 15, inertia 1075710.3080849806 Iteration 16, inertia 1075017.7497480959 Iteration 17, inertia 1074434.2008408068 Iteration 18, inertia 1073975.671423709 Iteration 19, inertia 1073645.7618919485 Iteration 20, inertia 1073418.4069973608 Iteration 21, inertia 1073262.0329617548 Iteration 22, inertia 1073147.0415727037 Iteration 23, inertia 1073062.525556698 Iteration 24, inertia 1072992.9295552443 Iteration 25, inertia 1072932.226669553 Iteration 26, inertia 1072866.292928011 Iteration 27, inertia 1072793.9933869515 Iteration 28, inertia 1072710.6318784188 Iteration 29, inertia 1072594.1928294217 Iteration 30, inertia 1072434.3678153679 Iteration 31, inertia 1072188.4111979539 Iteration 32, inertia 1071837.470523644 Iteration 33, inertia 1071407.8583750606 Iteration 34, inertia 1070985.771902862 Iteration 35, inertia 1070615.7857233952 Iteration 36, inertia 1070315.133106174 Iteration 37, inertia 1070071.6954434349 Iteration 38, inertia 1069813.432080524 Iteration 39, inertia 1069513.1026847577 Iteration 40, inertia 1069154.5258114587 Iteration 41, inertia 1068720.6721533719 Iteration 42, inertia 1068218.6447184903 Iteration 43, inertia 1067578.4989955402 Iteration 44, inertia 1066774.3121690569 Iteration 45, inertia 1065768.4339077924 Iteration 46, inertia 1064614.8485870115 Iteration 47, inertia 1063583.8515457623 Iteration 48, inertia 1062913.9978372105 Iteration 49, inertia 1062643.9768895397 Iteration 50, inertia 1062536.2095102891 Iteration 51, inertia 1062480.1934463005 Iteration 52, inertia 1062442.7308576854 Iteration 53, inertia 1062421.9193580071 Iteration 54, inertia 1062407.4635885195 Iteration 55, inertia 1062396.355058509 Iteration 56, inertia 1062388.8980699282 Iteration 57, inertia 1062383.6123284558 Iteration 58, inertia 1062379.8840699268 Iteration 59, inertia 1062376.7716375433 Iteration 60, inertia 1062374.2867137413 Iteration 61, inertia 1062372.062373369 Converged at iteration 61: center shift 9.561298688633168e-05 within tolerance 0.00010000000000000789. Initialization complete Iteration 0, inertia 1533481.9187964187 Iteration 1, inertia 1141022.7833333153 Iteration 2, inertia 1111155.4020968876 Iteration 3, inertia 1099516.2178008854 Iteration 4, inertia 1093500.488288347 Iteration 5, inertia 1090108.9627420371 Iteration 6, inertia 1087741.653824915 Iteration 7, inertia 1085755.184394873 Iteration 8, inertia 1083693.3415515418 Iteration 9, inertia 1081859.096344771 Iteration 10, inertia 1081227.4057550728 Iteration 11, inertia 1080686.6846978166 Iteration 12, inertia 1080131.350464464 Iteration 13, inertia 1079547.3679792145 Iteration 14, inertia 1078951.2425748568 Iteration 15, inertia 1078381.764592131 Iteration 16, inertia 1077824.6682510613 Iteration 17, inertia 1077261.1216450408 Iteration 18, inertia 1076671.9330196865 Iteration 19, inertia 1076111.6391608259 Iteration 20, inertia 1075593.556684215 Iteration 21, inertia 1075076.7548978366 Iteration 22, inertia 1074614.4708625488 Iteration 23, inertia 1074198.2815987836 Iteration 24, inertia 1073811.151690962 Iteration 25, inertia 1073489.2345411803 Iteration 26, inertia 1073216.5376026134 Iteration 27, inertia 1072997.4177629729 Iteration 28, inertia 1072837.4388617678 Iteration 29, inertia 1072720.314760919 Iteration 30, inertia 1072626.0907780563 Iteration 31, inertia 1072551.4392313287 Iteration 32, inertia 1072491.3405777218 Iteration 33, inertia 1072439.5309630686 Iteration 34, inertia 1072394.4293159195 Iteration 35, inertia 1072351.9268389586 Iteration 36, inertia 1072308.3199412385 Iteration 37, inertia 1072261.0380787032 Iteration 38, inertia 1072218.5762760725 Iteration 39, inertia 1072179.0849450931 Iteration 40, inertia 1072142.9535733769 Iteration 41, inertia 1072104.4283442306 Iteration 42, inertia 1072061.5394010097 Iteration 43, inertia 1072006.5554972412 Iteration 44, inertia 1071942.7896558691 Iteration 45, inertia 1071880.2072995566 Iteration 46, inertia 1071844.3775986482 Iteration 47, inertia 1071821.8696546457 Iteration 48, inertia 1071800.742329854 Iteration 49, inertia 1071782.065081264 Iteration 50, inertia 1071761.8704620854 Iteration 51, inertia 1071744.9171639706 Iteration 52, inertia 1071730.944002384 Iteration 53, inertia 1071718.896431847 Iteration 54, inertia 1071707.72482154 Iteration 55, inertia 1071697.0325495445 Iteration 56, inertia 1071686.204887337 Iteration 57, inertia 1071670.307996194 Iteration 58, inertia 1071651.5522898566 Iteration 59, inertia 1071635.9610689697 Iteration 60, inertia 1071619.7618995504 Iteration 61, inertia 1071603.462529547 Iteration 62, inertia 1071588.4260267755 Iteration 63, inertia 1071573.7711582186 Iteration 64, inertia 1071558.7130277792 Iteration 65, inertia 1071547.993367073 Iteration 66, inertia 1071539.4740874527 Iteration 67, inertia 1071530.4252345695 Iteration 68, inertia 1071520.6202985893 Iteration 69, inertia 1071513.1100882855 Iteration 70, inertia 1071505.071922007 Iteration 71, inertia 1071498.3739023365 Iteration 72, inertia 1071493.5422717195 Iteration 73, inertia 1071489.681235108 Iteration 74, inertia 1071487.4670391225 Converged at iteration 74: center shift 8.717316594747099e-05 within tolerance 0.00010000000000000789. Initialization complete Iteration 0, inertia 1528040.2246529965 Iteration 1, inertia 1132014.441343659 Iteration 2, inertia 1096128.4818956906 Iteration 3, inertia 1083533.3974377345 Iteration 4, inertia 1078865.6156553838 Iteration 5, inertia 1076121.4486851885 Iteration 6, inertia 1073558.5157769932 Iteration 7, inertia 1071107.7220839143 Iteration 8, inertia 1069102.9029251868 Iteration 9, inertia 1067622.7586647086 Iteration 10, inertia 1066568.794179264 Iteration 11, inertia 1065848.4040802477 Iteration 12, inertia 1065343.9059031007 Iteration 13, inertia 1064945.5080586167 Iteration 14, inertia 1064595.9972997725 Iteration 15, inertia 1064243.0981610145 Iteration 16, inertia 1063878.4567679302 Iteration 17, inertia 1063518.894244183 Iteration 18, inertia 1063126.5469427484 Iteration 19, inertia 1062704.494770205 Iteration 20, inertia 1062232.4549200027 Iteration 21, inertia 1061759.4127827992 Iteration 22, inertia 1061296.355658263 Iteration 23, inertia 1060897.1701654126 Iteration 24, inertia 1060543.8760499202 Iteration 25, inertia 1060266.017233957 Iteration 26, inertia 1060046.9607272744 Iteration 27, inertia 1059871.68806692 Iteration 28, inertia 1059739.9966508104 Iteration 29, inertia 1059645.3738772452 Iteration 30, inertia 1059572.214740185 Iteration 31, inertia 1059520.1092372953 Iteration 32, inertia 1059476.8590250073 Iteration 33, inertia 1059438.98927601 Iteration 34, inertia 1059408.330542772 Iteration 35, inertia 1059378.964154699 Iteration 36, inertia 1059356.676373178 Iteration 37, inertia 1059339.0079398362 Iteration 38, inertia 1059322.599733904 Iteration 39, inertia 1059305.6591112644 Iteration 40, inertia 1059289.183936796 Iteration 41, inertia 1059270.5782940204 Iteration 42, inertia 1059256.244436124 Iteration 43, inertia 1059237.6365943574 Iteration 44, inertia 1059217.527012061 Iteration 45, inertia 1059200.159859976 Iteration 46, inertia 1059184.4427141547 Iteration 47, inertia 1059169.497724323 Iteration 48, inertia 1059156.0230105342 Iteration 49, inertia 1059144.7047525654 Iteration 50, inertia 1059135.0483952186 Iteration 51, inertia 1059125.2469544273 Iteration 52, inertia 1059116.1608971434 Iteration 53, inertia 1059106.096855189 Iteration 54, inertia 1059093.6162468693 Iteration 55, inertia 1059081.2509976227 Iteration 56, inertia 1059071.336415391 Iteration 57, inertia 1059063.7248753135 Iteration 58, inertia 1059056.5072381685 Iteration 59, inertia 1059047.4995729045 Iteration 60, inertia 1059037.7777436632 Iteration 61, inertia 1059030.2554825335 Iteration 62, inertia 1059025.1158830528 Iteration 63, inertia 1059021.0003963353 Iteration 64, inertia 1059016.605331854 Iteration 65, inertia 1059012.2818199464 Iteration 66, inertia 1059007.748622384 Iteration 67, inertia 1059003.1868528523 Iteration 68, inertia 1058998.866969727 Iteration 69, inertia 1058994.170736548 Iteration 70, inertia 1058989.3014590035 Iteration 71, inertia 1058984.815117142 Iteration 72, inertia 1058980.2867537728 Iteration 73, inertia 1058976.2158306516 Iteration 74, inertia 1058972.4788188383 Iteration 75, inertia 1058968.8729347764 Iteration 76, inertia 1058964.9579482079 Iteration 77, inertia 1058960.290779928 Iteration 78, inertia 1058954.2704991892 Iteration 79, inertia 1058947.5497043321 Iteration 80, inertia 1058941.9138010992 Iteration 81, inertia 1058937.622062571 Iteration 82, inertia 1058933.9666619385 Iteration 83, inertia 1058930.9071479084 Iteration 84, inertia 1058928.4332417247 Iteration 85, inertia 1058926.473769887 Converged at iteration 85: center shift 9.4595908616319e-05 within tolerance 0.00010000000000000789. Initialization complete Iteration 0, inertia 1543090.5000349204 Iteration 1, inertia 1166324.3616256032 Iteration 2, inertia 1119224.5490564962 Iteration 3, inertia 1099799.5381588547 Iteration 4, inertia 1090897.1404651632 Iteration 5, inertia 1086576.1927144395 Iteration 6, inertia 1083438.8237490046 Iteration 7, inertia 1080391.611521363 Iteration 8, inertia 1077304.5908761937 Iteration 9, inertia 1074496.3241475555 Iteration 10, inertia 1072071.4839542503 Iteration 11, inertia 1069896.3185046273 Iteration 12, inertia 1067914.562606338 Iteration 13, inertia 1066318.316520494 Iteration 14, inertia 1065155.5177455682 Iteration 15, inertia 1064465.394332949 Iteration 16, inertia 1064087.9557392378 Iteration 17, inertia 1063895.3570493439 Iteration 18, inertia 1063798.9477325582 Iteration 19, inertia 1063748.1761253418 Iteration 20, inertia 1063714.4506418793 Iteration 21, inertia 1063692.7882838193 Iteration 22, inertia 1063676.089419221 Iteration 23, inertia 1063664.09702477 Iteration 24, inertia 1063655.030625566 Iteration 25, inertia 1063648.0774148665 Iteration 26, inertia 1063643.4748189265 Iteration 27, inertia 1063639.3800228618 Iteration 28, inertia 1063634.963167905 Iteration 29, inertia 1063631.3119743099 Iteration 30, inertia 1063628.6105679066 Iteration 31, inertia 1063626.602572974 Converged at iteration 31: center shift 9.386443328024657e-05 within tolerance 0.00010000000000000789. Initialization complete Iteration 0, inertia 1527430.9989983155 Iteration 1, inertia 1189477.661161892 Iteration 2, inertia 1148667.4448779915 Iteration 3, inertia 1131622.7503587177 Iteration 4, inertia 1121059.359653391 Iteration 5, inertia 1108791.408601461 Iteration 6, inertia 1092582.094986319 Iteration 7, inertia 1082379.1128686229 Iteration 8, inertia 1076073.621133802 Iteration 9, inertia 1071595.0938703306 Iteration 10, inertia 1068583.5183418733 Iteration 11, inertia 1066425.8086418065 Iteration 12, inertia 1064202.5906458988 Iteration 13, inertia 1062854.871153981 Iteration 14, inertia 1062342.6678998582 Iteration 15, inertia 1062121.1253384447 Iteration 16, inertia 1062002.6283166695 Iteration 17, inertia 1061941.40448264 Iteration 18, inertia 1061907.29805096 Iteration 19, inertia 1061885.5922303603 Iteration 20, inertia 1061872.2726760267 Iteration 21, inertia 1061863.3048171166 Iteration 22, inertia 1061857.068581753 Iteration 23, inertia 1061851.6947927603 Iteration 24, inertia 1061847.484437047 Iteration 25, inertia 1061845.4281391136 Iteration 26, inertia 1061843.9845963155 Iteration 27, inertia 1061842.7092984377 Converged at iteration 27: center shift 6.317482042283155e-05 within tolerance 0.00010000000000000789. Initialization complete Iteration 0, inertia 1530010.1767161565 Iteration 1, inertia 1159639.7293103684 Iteration 2, inertia 1112482.6460443856 Iteration 3, inertia 1093082.351264284 Iteration 4, inertia 1083981.2656860645 Iteration 5, inertia 1079032.6999509674 Iteration 6, inertia 1076208.6318160724 Iteration 7, inertia 1074385.7596911436 Iteration 8, inertia 1073195.3632660937 Iteration 9, inertia 1072076.6445564027 Iteration 10, inertia 1070853.6411840897 Iteration 11, inertia 1069680.7558374836 Iteration 12, inertia 1068722.5790395476 Iteration 13, inertia 1067988.3841193928 Iteration 14, inertia 1067356.2659693686 Iteration 15, inertia 1066712.180186866 Iteration 16, inertia 1066056.9153973889 Iteration 17, inertia 1065414.9024735999 Iteration 18, inertia 1064785.5843123326 Iteration 19, inertia 1064230.5987404934 Iteration 20, inertia 1063786.7603719388 Iteration 21, inertia 1063462.6357750278 Iteration 22, inertia 1063233.2777044037 Iteration 23, inertia 1063074.9347412293 Iteration 24, inertia 1062968.649961844 Iteration 25, inertia 1062884.2623089 Iteration 26, inertia 1062801.0268068856 Iteration 27, inertia 1062708.6831022594 Iteration 28, inertia 1062606.6605326962 Iteration 29, inertia 1062487.4745810644 Iteration 30, inertia 1062344.6313244142 Iteration 31, inertia 1062189.857764195 Iteration 32, inertia 1062034.5844359356 Iteration 33, inertia 1061885.7555336356 Iteration 34, inertia 1061770.172910868 Iteration 35, inertia 1061673.2146344283 Iteration 36, inertia 1061597.4049100603 Iteration 37, inertia 1061537.4300182697 Iteration 38, inertia 1061483.4165872629 Iteration 39, inertia 1061436.8619644928 Iteration 40, inertia 1061395.730660208 Iteration 41, inertia 1061361.6492275698 Iteration 42, inertia 1061334.5089811084 Iteration 43, inertia 1061309.9706417432 Iteration 44, inertia 1061285.9829691774 Iteration 45, inertia 1061266.2001872212 Iteration 46, inertia 1061251.5458898372 Iteration 47, inertia 1061241.1361962438 Iteration 48, inertia 1061232.0875043431 Iteration 49, inertia 1061225.7932694745 Iteration 50, inertia 1061219.086442478 Iteration 51, inertia 1061211.356667377 Iteration 52, inertia 1061204.0193982716 Iteration 53, inertia 1061196.2735246203 Iteration 54, inertia 1061189.9466036186 Iteration 55, inertia 1061184.9722401784 Iteration 56, inertia 1061180.7611597797 Iteration 57, inertia 1061177.7977453128 Iteration 58, inertia 1061175.6350618722 Iteration 59, inertia 1061173.6837396577 Iteration 60, inertia 1061171.8742801761 Iteration 61, inertia 1061169.728116058 Iteration 62, inertia 1061167.6811046973 Iteration 63, inertia 1061166.0375455136 Converged at iteration 63: center shift 9.567525712380937e-05 within tolerance 0.00010000000000000789. Initialization complete Iteration 0, inertia 1485366.34937874 Iteration 1, inertia 1166914.313650583 Iteration 2, inertia 1109185.6074101632 Iteration 3, inertia 1089592.468896896 Iteration 4, inertia 1082071.5882838317 Iteration 5, inertia 1077532.6295956601 Iteration 6, inertia 1074092.8014361109 Iteration 7, inertia 1071168.9119014665 Iteration 8, inertia 1068851.0039151972 Iteration 9, inertia 1067066.053030216 Iteration 10, inertia 1065704.3627450417 Iteration 11, inertia 1064727.908979444 Iteration 12, inertia 1063929.7708738698 Iteration 13, inertia 1063092.0022536183 Iteration 14, inertia 1062244.8513246507 Iteration 15, inertia 1061365.7790582113 Iteration 16, inertia 1060559.2299659806 Iteration 17, inertia 1060005.3881050162 Iteration 18, inertia 1059667.8845229698 Iteration 19, inertia 1059451.7571624236 Iteration 20, inertia 1059313.6659550834 Iteration 21, inertia 1059219.4251809565 Iteration 22, inertia 1059159.4900256381 Iteration 23, inertia 1059106.7873514341 Iteration 24, inertia 1059055.1664184304 Iteration 25, inertia 1059000.2897709426 Iteration 26, inertia 1058964.6554665957 Iteration 27, inertia 1058946.478830107 Iteration 28, inertia 1058933.7910261266 Iteration 29, inertia 1058922.2496031793 Iteration 30, inertia 1058913.1677040963 Iteration 31, inertia 1058904.5710288961 Iteration 32, inertia 1058894.9261501804 Iteration 33, inertia 1058885.0855612298 Iteration 34, inertia 1058876.0873081826 Iteration 35, inertia 1058866.8290601566 Iteration 36, inertia 1058856.420964137 Iteration 37, inertia 1058844.40159226 Iteration 38, inertia 1058830.957432061 Iteration 39, inertia 1058816.2007536655 Iteration 40, inertia 1058802.2619843865 Iteration 41, inertia 1058788.8399013635 Iteration 42, inertia 1058775.0185238845 Iteration 43, inertia 1058761.9433659778 Iteration 44, inertia 1058749.8952934751 Iteration 45, inertia 1058736.4358992109 Iteration 46, inertia 1058721.7671516917 Iteration 47, inertia 1058705.438174019 Iteration 48, inertia 1058687.3477819543 Iteration 49, inertia 1058667.74306436 Iteration 50, inertia 1058647.462391039 Iteration 51, inertia 1058626.2776998058 Iteration 52, inertia 1058602.3496328378 Iteration 53, inertia 1058571.0571717173 Iteration 54, inertia 1058533.6220375071 Iteration 55, inertia 1058493.718234758 Iteration 56, inertia 1058450.9670890549 Iteration 57, inertia 1058411.8092976185 Iteration 58, inertia 1058372.0378341132 Iteration 59, inertia 1058331.5588077034 Iteration 60, inertia 1058293.9539806114 Iteration 61, inertia 1058262.6127494525 Iteration 62, inertia 1058233.307646366 Iteration 63, inertia 1058204.9140189707 Iteration 64, inertia 1058180.5723503446 Iteration 65, inertia 1058157.4309788363 Iteration 66, inertia 1058139.2944931553 Iteration 67, inertia 1058124.9595231672 Iteration 68, inertia 1058111.4891680724 Iteration 69, inertia 1058099.3015343037 Iteration 70, inertia 1058087.1314855064 Iteration 71, inertia 1058074.3424989572 Iteration 72, inertia 1058062.2126820935 Iteration 73, inertia 1058048.574945352 Iteration 74, inertia 1058032.1363788838 Iteration 75, inertia 1058014.9218950823 Iteration 76, inertia 1057999.021546376 Iteration 77, inertia 1057980.6384130116 Iteration 78, inertia 1057959.1088955405 Iteration 79, inertia 1057932.288432137 Iteration 80, inertia 1057898.7485262025 Iteration 81, inertia 1057859.0110090794 Iteration 82, inertia 1057817.3759197462 Iteration 83, inertia 1057775.0948727496 Iteration 84, inertia 1057736.7018026111 Iteration 85, inertia 1057697.586648922 Iteration 86, inertia 1057660.3143450823 Iteration 87, inertia 1057625.8905078045 Iteration 88, inertia 1057589.7207763102 Iteration 89, inertia 1057550.2113983117 Iteration 90, inertia 1057501.2912635861 Iteration 91, inertia 1057449.808344935 Iteration 92, inertia 1057394.494191321 Iteration 93, inertia 1057339.1789444939 Iteration 94, inertia 1057287.4364556733 Iteration 95, inertia 1057238.1434121171 Iteration 96, inertia 1057187.3680744208 Iteration 97, inertia 1057137.4038580717 Iteration 98, inertia 1057093.4916064262 Iteration 99, inertia 1057051.8725348955 Iteration 100, inertia 1057010.7370654652 Iteration 101, inertia 1056968.8927394166 Iteration 102, inertia 1056928.2504798614 Iteration 103, inertia 1056892.2883706912 Iteration 104, inertia 1056858.581822213 Iteration 105, inertia 1056829.7512072735 Iteration 106, inertia 1056803.3847363575 Iteration 107, inertia 1056782.0109399306 Iteration 108, inertia 1056764.6385593202 Iteration 109, inertia 1056749.464780469 Iteration 110, inertia 1056737.1231428802 Iteration 111, inertia 1056727.4188840124 Iteration 112, inertia 1056720.0695550372 Iteration 113, inertia 1056713.917640718 Iteration 114, inertia 1056708.5976807354 Iteration 115, inertia 1056705.2803011325 Iteration 116, inertia 1056701.9135425468 Iteration 117, inertia 1056697.8815472107 Iteration 118, inertia 1056693.8380570295 Iteration 119, inertia 1056689.9871340892 Iteration 120, inertia 1056686.0548176991 Iteration 121, inertia 1056682.48471731 Iteration 122, inertia 1056679.2074778152 Iteration 123, inertia 1056676.1694588363 Iteration 124, inertia 1056672.5157562606 Iteration 125, inertia 1056668.277051697 Iteration 126, inertia 1056664.1859588255 Iteration 127, inertia 1056660.9171111616 Iteration 128, inertia 1056657.8133843595 Iteration 129, inertia 1056654.7874933477 Iteration 130, inertia 1056651.9978181226 Iteration 131, inertia 1056649.683152547 Iteration 132, inertia 1056647.6377632297 Iteration 133, inertia 1056645.6947906385 Converged at iteration 133: center shift 9.970076741224624e-05 within tolerance 0.00010000000000000789. Initialization complete Iteration 0, inertia 1533726.2830152828 Iteration 1, inertia 1135317.3774418416 Iteration 2, inertia 1099930.2580672416 Iteration 3, inertia 1085748.9516291567 Iteration 4, inertia 1078421.0872950258 Iteration 5, inertia 1074224.463333758 Iteration 6, inertia 1071223.7175547474 Iteration 7, inertia 1068673.7846402095 Iteration 8, inertia 1066213.7900457538 Iteration 9, inertia 1064011.2861757297 Iteration 10, inertia 1062369.7206848466 Iteration 11, inertia 1061361.0495962086 Iteration 12, inertia 1060745.0512778722 Iteration 13, inertia 1060354.669498197 Iteration 14, inertia 1060090.1435792325 Iteration 15, inertia 1059892.8804547815 Iteration 16, inertia 1059736.049663461 Iteration 17, inertia 1059613.105757206 Iteration 18, inertia 1059519.1006844048 Iteration 19, inertia 1059447.8909466825 Iteration 20, inertia 1059396.200936344 Iteration 21, inertia 1059359.1100332565 Iteration 22, inertia 1059331.7554745025 Iteration 23, inertia 1059311.2234102066 Iteration 24, inertia 1059292.445693493 Iteration 25, inertia 1059277.1914786512 Iteration 26, inertia 1059264.9347930485 Iteration 27, inertia 1059255.817714924 Iteration 28, inertia 1059247.5494275303 Iteration 29, inertia 1059240.2534132998 Iteration 30, inertia 1059234.1783999188 Iteration 31, inertia 1059229.3602603008 Iteration 32, inertia 1059225.632075968 Iteration 33, inertia 1059222.8040312969 Iteration 34, inertia 1059220.101204206 Iteration 35, inertia 1059217.3366438097 Iteration 36, inertia 1059215.080626023 Iteration 37, inertia 1059213.1263108135 Converged at iteration 37: center shift 7.546514179991207e-05 within tolerance 0.00010000000000000789. [Pipeline] ............ (step 2 of 2) Processing kmeans, total= 25.6s
spotify_data
| valence | year | acousticness | artists | danceability | duration_ms | energy | explicit | id | instrumentalness | key | liveness | loudness | mode | name | popularity | release_date | speechiness | tempo | cluster_label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0594 | 1921 | 0.98200 | ['Sergei Rachmaninoff', 'James Levine', 'Berli... | 0.279 | 831667 | 0.211 | 0 | 4BJqT0PrAfrxzMOxytFOIz | 0.878000 | 10 | 0.6650 | -20.096 | 1 | Piano Concerto No. 3 in D Minor, Op. 30: III. ... | 4 | 1921 | 0.0366 | 80.954 | 12 |
| 1 | 0.9630 | 1921 | 0.73200 | ['Dennis Day'] | 0.819 | 180533 | 0.341 | 0 | 7xPhfUan2yNtyFG0cUWkt8 | 0.000000 | 7 | 0.1600 | -12.441 | 1 | Clancy Lowered the Boom | 5 | 1921 | 0.4150 | 60.936 | 11 |
| 2 | 0.0394 | 1921 | 0.96100 | ['KHP Kridhamardawa Karaton Ngayogyakarta Hadi... | 0.328 | 500062 | 0.166 | 0 | 1o6I8BglA6ylDMrIELygv1 | 0.913000 | 3 | 0.1010 | -14.850 | 1 | Gati Bali | 5 | 1921 | 0.0339 | 110.339 | 3 |
| 3 | 0.1650 | 1921 | 0.96700 | ['Frank Parker'] | 0.275 | 210000 | 0.309 | 0 | 3ftBPsC5vPBKxYSee08FDH | 0.000028 | 5 | 0.3810 | -9.316 | 1 | Danny Boy | 3 | 1921 | 0.0354 | 100.109 | 17 |
| 4 | 0.2530 | 1921 | 0.95700 | ['Phil Regan'] | 0.418 | 166693 | 0.193 | 0 | 4d6HGyGT8e121BsdKmw9v6 | 0.000002 | 3 | 0.2290 | -10.096 | 1 | When Irish Eyes Are Smiling | 2 | 1921 | 0.0380 | 101.665 | 17 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 170648 | 0.6080 | 2020 | 0.08460 | ['Anuel AA', 'Daddy Yankee', 'KAROL G', 'Ozuna... | 0.786 | 301714 | 0.808 | 0 | 0KkIkfsLEJbrcIhYsCL7L5 | 0.000289 | 7 | 0.0822 | -3.702 | 1 | China | 72 | 2020-05-29 | 0.0881 | 105.029 | 0 |
| 170649 | 0.7340 | 2020 | 0.20600 | ['Ashnikko'] | 0.717 | 150654 | 0.753 | 0 | 0OStKKAuXlxA0fMH54Qs6E | 0.000000 | 7 | 0.1010 | -6.020 | 1 | Halloweenie III: Seven Days | 68 | 2020-10-23 | 0.0605 | 137.936 | 4 |
| 170650 | 0.6370 | 2020 | 0.10100 | ['MAMAMOO'] | 0.634 | 211280 | 0.858 | 0 | 4BZXVFYCb76Q0Klojq4piV | 0.000009 | 4 | 0.2580 | -2.226 | 0 | AYA | 76 | 2020-11-03 | 0.0809 | 91.688 | 16 |
| 170651 | 0.1950 | 2020 | 0.00998 | ['Eminem'] | 0.671 | 337147 | 0.623 | 1 | 5SiZJoLXp3WOl3J4C8IK0d | 0.000008 | 2 | 0.6430 | -7.161 | 1 | Darkness | 70 | 2020-01-17 | 0.3080 | 75.055 | 14 |
| 170652 | 0.6420 | 2020 | 0.13200 | ['KEVVO', 'J Balvin'] | 0.856 | 189507 | 0.721 | 1 | 7HmnJHfs0BkFzX4x8j0hkl | 0.004710 | 7 | 0.1820 | -4.928 | 1 | Billetes Azules (with J Balvin) | 74 | 2020-10-16 | 0.1080 | 94.991 | 14 |
170653 rows × 20 columns
from sklearn.decomposition import PCA
pca_pipeline = Pipeline([('scaler', StandardScaler()),
('PCA', PCA(n_components=2))])
song_embedding = pca_pipeline.fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=song_embedding)
projection['title'] = spotify_data['name']
projection['cluster'] = spotify_data['cluster_label']
import plotly.express as px
fig = px.scatter(projection, x='x', y='y', color='cluster',
hover_data=['x', 'y', 'title'])
fig.show()